from dataPath import original_datapath,cleaned_filepath

#清洗数据函数
def cleanDate():
    print("正在进行数据清洗")
    file = open(original_datapath, 'rb')
    newfile = open(cleaned_filepath, 'w', encoding='utf-8')
    searchLog = file.readlines()

    for i in searchLog:
        str1 = ''
        templist = i.split()[4:]
        for j in templist:
            j = j.decode('ANSI')
            if not j.startswith("http"):
                str1 += j
                str1 += '      '
        newfile.write(str1 + '\n')
    print("已完成数据清洗")
    newfile.close()
    file.close()
